********************************************************************************
*																		       *
* 							VAP								 				   *
*								     									       *
********************************************************************************

* -----> This do-file: Figure A2

********************************************************************************

set more off
clear all
set matsize 3000
set maxvar 10000

************************************


* Path 
if "`c(username)'"=="Juan S. Morales" | "`c(username)'"=="jmorales"{
	global PathData = "C:/Users/`c(username)'/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Data/"
	global PathFig = "C:/Users/`c(username)'/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Figures/"
	global PathTab = "C:/Users/`c(username)'/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Tables/"
	}
else { 
	global PathData = "/Users/JNG/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Data/"
	global PathFig = "/Users/JNG/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Figures/"
	global PathTab = "/Users/JNG/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Tables/"
	} 

use "${PathData}tweets_poland.dta", clear

***Basic data cleaning and generate basic controls****
sort screen_name
merge screen_name using "${PathData}mayors.dta"
drop if mayor==1
drop mayor
drop _merge

gen date = substr(created_at,1,10)
gen month = substr(date,6,2)
gen year = substr(date,1,4)
gen day = substr(date,9,2)
gen hour = substr(created_at,12,2)
destring(month), replace
destring(year), replace
destring(day), replace
destring(hour), replace
gen time = (year-2012)*12 + month
gen timeD = (year-2012)*365 + (month==2)*31 + (month==3)*(31+28) + (month==4)*(31*2 + 28) + (month==5)*(31*2 + 28 +30) + (month==6)*(31*3 + 28 +30) + (month==7)*(31*3 + 28 + 30*2) + (month==8)*(31*4 + 28 + 30*2) + (month==9)*(31*5 + 28 + 30*2) + (month==10)*(31*5 + 28 + 30*3) + (month==11)*(31*6 + 28 + 30*3) + (month==12)*(31*6 + 28 + 30*4) + day + 1*((month>2 & year==2012)|(year>2012))
gen week = round(timeD/7)
gen date2 = mdy(month, day, year)

gen text_l = lower(text)
replace text = text_l
drop text_l
replace text = subinstr(text, "á", "a",.) 
replace text = subinstr(text, "é", "e",.) 
replace text = subinstr(text, "í", "i",.) 
replace text = subinstr(text, "ó", "o",.) 
replace text = subinstr(text, "ú", "u",.)
replace text = subinstr(text, "ñ", "nh",.)
replace text = subinstr(text, ",", " ",.)
replace text = subinstr(text, `"""',  "", .)
replace text = subinstr(text, ".", " ",.) 
replace text = subinstr(text, "-", " ",.)
replace text = subinstr(text, "!", " ",.) 
replace text = subinstr(text, "/", " ",.)
replace text = subinstr(text, "…", " ",.)
replace text = subinstr(text, ":", " ",.)
replace text = subinstr(text, ";", " ",.)
* replace text = subinstr(text, "#", "",.)
* replace text = subinstr(text, "@", "",.)
forvalues i = 1(1)7 {
	replace text = subinstr(text, "  ", " ",.)
}
capture drop gg_*
gen gg_hashtag = strpos(text, "#")>0
gen gg_at = strpos(text, "@")>0
gen gg_reply = in_reply_to_user!=""
*at zero for this dataset
gen gg_rt = strpos(text, "rt ") == 1
gen gg_http = strpos(text, "http")>0

***merge MP data****

merge m:m screen_name using "${PathData}mp.dta"
rename _merge mergeMP

***** Coding VARIABLES (general) *****

gen dOfWeek = mod(date2,7)
replace dOfWeek = dOfWeek - 2
replace dOfWeek = dOfWeek + 7 if dOfWeek<1

drop gg_rt
gen gg_rt = strpos(text, "rt ") == 1

gen gg_adamowicz = strpos(text, "adamowicz")>0

gen log_rt = log(rt_count + 1)
gen log_fav = log(favcount + 1)
gen log_engagement = log(rt_count + favcount + 1)
egen userid = group(screen_name)
bysort date2 userid: gen N_du = _N



**** Coding TREATMENT (Adamowicz) *******

	
gen dayC_ada = date2 - 21562
gen dayC_sq_ada = dayC_ada*dayC_ada
gen dayC_cu_ada = dayC_ada*dayC_ada*dayC_ada
gen post_ada = date2>=21562
gen dayC_post_ada = dayC_ada*post_ada
gen dayC_sq_post_ada = dayC_sq_ada*post_ada
gen dayC_cu_post_ada = dayC_cu_ada*post_ada
gen dayC_opos_ada = dayC_ada*opposition
gen dayC_sq_opos_ada = dayC_sq_ada*opposition
gen dayC_cu_opos_ada = dayC_cu_ada*opposition
gen dayC_opos_post_ada = dayC_ada*opposition*post_ada
gen dayC_sq_opos_post_ada = dayC_sq_ada*opposition*post_ada
gen dayC_cu_opos_post_ada = dayC_cu_ada*opposition*post_ada
gen post_opo_ada = post_ada*opposition
gen post_gob_ada = post_ada*government

gen hour_sq = hour*hour
gen hour_cu = hour*hour*hour

bysort userid: gen n_u = _n
gen ffd_dayssinceE_ada = 99999
gen ffd_targetdate_ada = 21562
foreach targetdate in 21562 {
	replace ffd_dayssinceE_ada = (date2-`targetdate') if (abs(date2-`targetdate')<=abs(ffd_dayssinceE_ada))
	replace ffd_targetdate_ada = `targetdate' if (abs(date2-`targetdate')<=abs(ffd_dayssinceE_ada))
}
gen event_days_ada = date2-21562

gen timeWindow0_ada = ffd_dayssinceE_ada>-3 & ffd_dayssinceE_ada<2
gen timeWindow1_ada = ffd_dayssinceE_ada>-6 & ffd_dayssinceE_ada<5
gen timeWindow2_ada = ffd_dayssinceE_ada>-11 & ffd_dayssinceE_ada<10
gen timeWindow3_ada = ffd_dayssinceE_ada>-16 & ffd_dayssinceE_ada<15
gen timeWindow4_ada = ffd_dayssinceE_ada>-21 & ffd_dayssinceE_ada<20
gen timeWindow5_ada = ffd_dayssinceE_ada>-31 & ffd_dayssinceE_ada<30



keep if timeWindow3_ada

gen count = 1

collapse (sum) count, by(userid) 

label variable count "Tweets per actor (15-day time window)"

hist count, bin(20) freq
